version 16
set more off

***************************************************************************************************
*THIS FILE PRODUCES FIGURE A.V OF SLEMROD-REHMAN-WASEEM-2020, RESTAT
***************************************************************************************************

/* PRELIMINARIES */

use				"$project_data\ITRM_Dislo_Unique_SelfEmployed_BaselineNameFreq.dta", clear
keep				if year<2012
drop 				one
g					one=1
bys				year:egen N=sum(one)
g					interval=0
replace			interval=1 if namefreq_baseline==1
replace			interval=2 if namefreq_baseline>1 & namefreq_baseline<=5
replace			interval=3 if namefreq_baseline>5 & namefreq_baseline<=10
replace			interval=4 if namefreq_baseline>10 & namefreq_baseline<=20
replace			interval=5 if namefreq_baseline>20 & namefreq_baseline<=50
replace			interval=6 if namefreq_baseline>50 & namefreq_baseline<=100
replace			interval=7 if namefreq_baseline>100 & namefreq_baseline<=500
replace			interval=8 if namefreq_baseline>500 & namefreq_baseline<=5000
replace			interval=9 if namefreq_baseline>5000
bys				year interval:egen numbinterv=sum(one)
g					fracinterv=(numbinterv/N)*100
bys 				year interval:g index=_n
#d					;
twoway  			(connected  numbinterv interval if  year==2006 & index==1,sort clcolor(gs9) mcolor(gs9) msymbol (o)    lwidth(thick))
					(connected  numbinterv interval if  year==2007 & index==1,sort  clcolor(teal) mcolor(teal) msymbol(+) lwidth(thick))
					(connected  numbinterv interval if  year==2008 & index==1,sort clcolor(brown) mcolor(brown) msymbol(sh) lwidth(thick))
					(connected  numbinterv interval if  year==2009 & index==1,sort clcolor(navy) mcolor(navy) msymbol(d) lwidth(thick))
					(connected  numbinterv interval if  year==2010 & index==1,sort clcolor(maroon) mcolor(maroon) msymbol(s) lwidth(thick))
					(connected  numbinterv interval if  year==2011 & index==1,sort clcolor(green) mcolor(green) msymbol(t) lwidth(thick)),
					ytitle(Number of Taxpayers (000s)) xtitle(Name Frequency) 
					ylabel(0 50000 "50" 100000 "100" 150000 "150")
					xlabel(1 "1" 2 "2-5" 3 "6-10" 4 "11-20" 5 "21-50" 6 "51-100" 7 "101-500" 8 "501-5000" 9 ">5000", angle(90))
					legend(region(style(none)) label(1 "2006") label(2 "2007") label(3 "2008")
					label(4 "2009") label(5 "2010") label(6 "2011") rows(2)) 
					graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph 			export "$project_output\Namefreq_baseline_Distribution_Number.eps", replace;

#d					;
twoway  			(connected  fracinterv interval if  year==2006 & index==1,sort clcolor(gs9) mcolor(gs9) msymbol (o)    lwidth(thick))
					(connected  fracinterv interval if  year==2007 & index==1,sort  clcolor(teal) mcolor(teal) msymbol(+) lwidth(thick))
					(connected  fracinterv interval if  year==2008 & index==1,sort clcolor(brown) mcolor(brown) msymbol(sh) lwidth(thick))
					(connected  fracinterv interval if  year==2009 & index==1,sort clcolor(navy) mcolor(navy) msymbol(d) lwidth(thick))
					(connected  fracinterv interval if  year==2010 & index==1,sort clcolor(maroon) mcolor(maroon) msymbol(s) lwidth(thick))
					(connected  fracinterv interval if  year==2011 & index==1,sort clcolor(green) mcolor(green) msymbol(t) lwidth(thick)),
					ytitle(Fraction of Taxpayers (%)) xtitle(Name Frequency) ylabel(0(10)30)
					xlabel(1 "1" 2 "2-5" 3 "6-10" 4 "11-20" 5 "21-50" 6 "51-100" 7 "101-500" 8 "501-5000" 9 ">5000", angle(90))
					legend(region(style(none)) label(1 "2006") label(2 "2007") label(3 "2008")
					label(4 "2009") label(5 "2010") label(6 "2011") rows(2)) 
					graphregion(fcolor(white) style(none) color(white) margin(0 2 0 2)) bgcolor(white);
graph 			export "$project_output\Namefreq_baselineDistribution_Fraction.eps", replace;

